# Script to produce figure B.5

options(stringsAsFactors = F)
library(grid)
library(gridExtra)
library(quanteda)
library(dplyr)
library(ggplot2)
library(patchwork)

#Import data
load(file = "ecb_corpus.rdata")

# Speaker Normalization

list_out <- c("raus","cf01","cf02","cf03","cf04","cf05","cf06","cf07","cf08","cf09","cf10","cf11","cf12","cf13",
              "cf14","cf15","cf16","cf17","cf99","cf","CF","Mario_Draghi_Interview","Ignazio_Visco_Interview",
              "Benoit_Coeure and Joerg_Asmussen","Hermann_Remsperger; Elmar_Stoess","Benoit_Coeure and Joachim Nagel",
              "Tommaso_Padoa_Schioppa","Jean_Claude_Trichet_Quotes","Working_Document","AREL","cultural")

ecb_corpus2 <- ecb_corpus[!ecb_corpus$Speaker %in% list_out & !is.na(ecb_corpus$Speaker),]
ecb_corpus2$Speaker <- gsub("_"," ",ecb_corpus2$Speaker)

a <- ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of Italy","Speaker"]
ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of Italy","Speaker"] <- paste(a,"(IT)",sep = " ")
a <- ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of France","Speaker"]
ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of France","Speaker"] <- paste(a,"(FR)",sep = " ")
a <- ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of Spain","Speaker"]
ecb_corpus2[ecb_corpus2$Central_Bank == "Bank of Spain","Speaker"] <- paste(a,"(ES)",sep = " ")
a <- ecb_corpus2[ecb_corpus2$Central_Bank == "Bundesbank","Speaker"]
ecb_corpus2[ecb_corpus2$Central_Bank == "Bundesbank","Speaker"] <- paste(a,"(DE)",sep = " ")
a <- ecb_corpus2[ecb_corpus2$Central_Bank == "ECB","Speaker"]
ecb_corpus2[ecb_corpus2$Central_Bank == "ECB","Speaker"] <- paste(a,"(ECB)",sep = " ")

# Defining Four Time Periods 

a <- list(c("1999","2000","2001","2002","2003","2004"),c("2005","2006","2007","2008","2009"),c("2010","2011","2012","2013","2014","2015"),c("2016","2017","2018","2019"))
b <- list("1999-2004","2005-2009","2010-2015","2016-2019")
c <- list(c("structural_reform","structural_policy"),c("competitiveness","competitive"),c("labour_market","labor_market"))
d <- list("Structural reform","Competitiveness","Labor market")
ecb_corpus_words_vis <- NULL
ecb_corpus_Speaker_all <- NULL

# Counting by groups (Speaker & Time)

for(i in 1:4){
  
  ecb_corpus_Speaker <- ecb_corpus2[ecb_corpus2$Year %in% a[[i]],]
  ecb_corpus_Speaker <- ecb_corpus_Speaker %>% group_by(Speaker) %>% count()
  ecb_corpus_Speaker$Period <- b[[i]]
  colnames(ecb_corpus_Speaker)[2] <- "all_Speeches"
  ecb_corpus_Speaker_all <- rbind(ecb_corpus_Speaker_all,ecb_corpus_Speaker)
}

for(j in 1:3){
  for(i in 1:4){
    ecb_corpus_ranking <- ecb_corpus2[ecb_corpus2$Year %in% a[[i]],]
    
    
    Korpus_DFM <- corpus(ecb_corpus_ranking$Lemma) %>%
      tokens(remove_punct=T,remove_symbols=T,remove_numbers = T) %>%
      tokens_ngrams(n = 1:2) %>% tokens_keep(c[[j]]) %>%
      dfm() %>%
      rowSums()
    ecb_corpus_words <- ecb_corpus_ranking[Korpus_DFM > 0,]
    ecb_corpus_words <- ecb_corpus_words %>% group_by(Speaker) %>% count()
    
    ecb_corpus_words <- ecb_corpus_words[order(ecb_corpus_words$n, decreasing = T),]
    ecb_corpus_words <- ecb_corpus_words[ecb_corpus_words$n>3,]
    ecb_corpus_words$Period <- b[[i]]
    ecb_corpus_words$Topic <- d[[j]]
    ecb_corpus_words_vis <- rbind(ecb_corpus_words_vis,ecb_corpus_words)
    print(paste0(i," ",j))
  }
}
ecb_corpus_words_vis <- merge(ecb_corpus_words_vis,ecb_corpus_Speaker_all, by = c("Speaker","Period"), all.x = T)
ecb_corpus_words_vis$Relative_Talk <- ecb_corpus_words_vis$n/ecb_corpus_words_vis$all_Speeches
ecb_corpus_words_vis <- ecb_corpus_words_vis[grepl("(ECB)",ecb_corpus_words_vis$Speaker),]


# Ranking per Year

ecb_corpus_words_vis$Speaker <- gsub(" (ECB)","",ecb_corpus_words_vis$Speaker,fixed = T)


ecb_corpus_words_vis$Speaker <- gsub(".* ","",ecb_corpus_words_vis$Speaker)

str_1 <- ecb_corpus_words_vis[ecb_corpus_words_vis$Topic == "Structural reform" & ecb_corpus_words_vis$Period == "1999-2004",]
str_1 <- str_1[order(str_1$Relative_Talk, decreasing = T),]
str_1 <- str_1[1:5,]

str_2 <- ecb_corpus_words_vis[ecb_corpus_words_vis$Topic == "Structural reform" & ecb_corpus_words_vis$Period == "2005-2009",]
str_2 <- str_2[order(str_2$Relative_Talk, decreasing = T),]
str_2 <- str_2[1:5,]

str_3 <- ecb_corpus_words_vis[ecb_corpus_words_vis$Topic == "Structural reform" & ecb_corpus_words_vis$Period == "2010-2015",]
str_3 <- str_3[order(str_3$Relative_Talk, decreasing = T),]
str_3 <- str_3[1:5,]

str_4 <- ecb_corpus_words_vis[ecb_corpus_words_vis$Topic == "Structural reform" & ecb_corpus_words_vis$Period == "2016-2019",]
str_4 <- str_4[order(str_4$Relative_Talk, decreasing = T),]
str_4 <- str_4[1:5,]

#Plot
gg_comp_1 <- ggplot(data = str_1, aes(x = reorder(Speaker,Relative_Talk), y = Relative_Talk)) + geom_bar(stat = "identity") +
  coord_flip() +
  xlab("") + 
  ylab("") +
  ggtitle('1999-2004') +
  theme_bw()
gg_comp_2 <- ggplot(data = str_2, aes(x = reorder(Speaker,Relative_Talk), y = Relative_Talk)) + geom_bar(stat = "identity") +
  coord_flip() +
  xlab("") + 
  ylab("") +
  ggtitle('2005-2009') +
  theme_bw()
gg_comp_3 <- ggplot(data = str_3, aes(x = reorder(Speaker,Relative_Talk), y = Relative_Talk)) + geom_bar(stat = "identity") +
  coord_flip() +
  xlab("") + 
  ylab("Share of speeches, 1=100%") +
  ggtitle('2010-2015') +
  theme_bw() 
gg_comp_4 <- ggplot(data = str_4, aes(x = reorder(Speaker,Relative_Talk), y = Relative_Talk)) + geom_bar(stat = "identity") +
  coord_flip() +
  xlab("") + 
  ylab("Share of speeches, 1=100%") +
  ggtitle('2016-2019') +
  theme_bw() 

(gg_comp_1 + gg_comp_2) / (gg_comp_3 + gg_comp_4)

ggsave("figures/fig5.pdf")
